0.1 Introduction:

  1. Data Preparation:
# Import data and clean names
whr2015_2022 <- read.csv(here::here("data/world-happiness-report-2015-2022-cleaned.csv"),
                     stringsAsFactors = FALSE) %>%
  janitor::clean_names() 

# Remove the index column
whr2015_2022 <- select(whr2015_2022, -x) 

# Replace commas with dots and remove asterisks
whr2015_2022 <- data.frame(lapply(whr2015_2022, function(x) gsub(",", ".", x)))
whr2015_2022 <- data.frame(lapply(whr2015_2022, function(x) gsub("\\*", "", x)))

# Convert columns to numeric
columns_to_convert <- c("happiness_rank", "happiness_score", "economy_gdp_per_capita", "family_social_support", "health_life_expectancy", "freedom", "trust_government_corruption", "generosity", "year")
whr2015_2022[columns_to_convert] <- lapply(whr2015_2022[columns_to_convert], as.numeric)

# Mapping for inconsistent country names
country_name_mapping <- c(
  "Taiwan Province of China" = "Taiwan",
  "Hong Kong S.A.R. of China" = "Hong Kong",
  "Hong Kong S.A.R., China" = "Hong Kong",
  "Hong Kong S.A.R.. China" = "Hong Kong",
  "Czechia" = "Czech Republic",
  "North Macedonia" = "Macedonia", 
  "Trinidad & Tobago" = "Trinidad and Tobago", 
  "North Cyprus" = "Northern Cyprus",
  "Somaliland region" = "Somalia",
  "Somaliland Region" = "Somalia", 
  "Palestinian Territories" = "Palestine", 
  "Eswatini. Kingdom of" = "Swaziland")

# Apply the mapping to consolidate country names
whr2015_2022$country <- mapvalues(whr2015_2022$country, from = names(country_name_mapping), to = country_name_mapping)

# Mapping for inconsistent region names
region_name_mapping <- c(
  "Eastern Asia" = "East Asia",
  "Southeastern Asia" = "Southeast Asia",
  "Southern Asia" = "South Asia",
  "Middle East and Northern Africa" = "Middle East and North Africa")

# Apply the mapping to consolidate region names
whr2015_2022$region <- mapvalues(whr2015_2022$region, from = names(region_name_mapping), to = region_name_mapping)

# Define the correct region for each country based on the standardized assignments
correct_regions <- c(
  "Armenia" = "Central and Eastern Europe",
  "Australia" = "Australia and New Zealand",
  "Taiwan" = "East Asia",
  "Belize" = "Latin America and Caribbean",
  "Hong Kong" = "East Asia",
  "Somalia" = "Sub-Saharan Africa",
  "Namibia" = "Sub-Saharan Africa",
  "South Sudan" = "Sub-Saharan Africa",
  "Trinidad and Tobago" = "Latin America and Caribbean",
  "North Cyprus" = "Western Asia or Europe",
  "Macedonia" = "Central and Eastern Europe",
  "Gambia" = "Sub-Saharan Africa",
  "Luxembourg" = "Western Europe",
  "Czech Republic" = "Central and Eastern Europe",
  "Guatemala" = "Latin America and Caribbean",
  "Kuwait" = "Middle East and North Africa",
  "Belarus" = "Central and Eastern Europe",
  "Turkmenistan" = "Central and Eastern Europe",
  "Libya" = "Middle East and North Africa",
  "Azerbaijan" = "Central and Eastern Europe",
  "Liberia" = "Sub-Saharan Africa",
  "Congo" = "Sub-Saharan Africa",
  "Niger" = "Sub-Saharan Africa",
  "Comoros" = "Sub-Saharan Africa",
  "Palestine" = "Middle East and North Africa",
  "Swaziland" = "Sub-Saharan Africa",
  "Madagascar" = "Sub-Saharan Africa",
  "Chad" = "Sub-Saharan Africa",
  "Yemen" = "Middle East and North Africa",
  "Mauritania" = "Sub-Saharan Africa",
  "Lesotho" = "Sub-Saharan Africa",
  "Botswana" = "Sub-Saharan Africa",
  "Rwanda" = "Sub-Saharan Africa",
  "Canada" = "North America",
  "Georgia" = "Central and Eastern Europe",
  "Kazakhstan" = "Central and Eastern Europe",
  "Kyrgyzstan" = "Central and Eastern Europe",
  "Moldova" = "Central and Eastern Europe",
  "New Zealand" = "Australia and New Zealand",
  "Russia" = "Central and Eastern Europe",
  "Tajikistan" = "Central and Eastern Europe",
  "Ukraine" = "Central and Eastern Europe",
  "United States" = "North America",
  "Uzbekistan" = "Central and Eastern Europe",
  "Northern Cyprus" = "Western Europe"
)

# Update the region for each country in the dataset
for (country in names(correct_regions)) {
  whr2015_2022[whr2015_2022$country == country, "region"] <- correct_regions[country]
}

# View the updated data
glimpse(whr2015_2022)
## Rows: 1,229
## Columns: 11
## $ happiness_rank              <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,…
## $ country                     <chr> "Switzerland", "Iceland", "Denmark", "Norw…
## $ region                      <chr> "Western Europe", "Western Europe", "Weste…
## $ happiness_score             <dbl> 7.587, 7.561, 7.527, 7.522, 7.427, 7.406, …
## $ economy_gdp_per_capita      <dbl> 1.39651, 1.30232, 1.32548, 1.45900, 1.3262…
## $ family_social_support       <dbl> 1.34951, 1.40223, 1.36058, 1.33095, 1.3226…
## $ health_life_expectancy      <dbl> 0.94143, 0.94784, 0.87464, 0.88521, 0.9056…
## $ freedom                     <dbl> 0.66557, 0.62877, 0.64938, 0.66973, 0.6329…
## $ trust_government_corruption <dbl> 0.41978, 0.14145, 0.48357, 0.36503, 0.3295…
## $ generosity                  <dbl> 0.29678, 0.43630, 0.34139, 0.34699, 0.4581…
## $ year                        <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, …
  1. Data exploration:
skim(whr2015_2022)
Data summary
Name whr2015_2022
Number of rows 1229
Number of columns 11
_______________________
Column type frequency:
character 2
numeric 9
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1 4 24 0 165 0
region 0 1 9 28 0 10 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
happiness_rank 0 1 77.47 44.47 1.0 39.00 77.00 116.00 158.00 ▇▇▇▇▇
happiness_score 0 1 5.43 1.12 2.4 4.58 5.41 6.22 7.84 ▁▅▇▇▃
economy_gdp_per_capita 0 1 0.98 0.43 0.0 0.67 1.01 1.30 2.21 ▃▅▇▃▁
family_social_support 0 1 1.03 0.33 0.0 0.83 1.07 1.27 1.64 ▁▂▆▇▅
health_life_expectancy 0 1 0.61 0.24 0.0 0.44 0.64 0.79 1.14 ▂▅▇▇▂
freedom 0 1 0.44 0.15 0.0 0.34 0.46 0.56 0.74 ▁▃▆▇▃
trust_government_corruption 0 1 0.13 0.11 0.0 0.06 0.10 0.16 0.59 ▇▃▁▁▁
generosity 0 1 0.20 0.12 0.0 0.12 0.19 0.26 0.84 ▇▇▂▁▁
year 0 1 2018.45 2.28 2015.0 2016.00 2018.00 2020.00 2022.00 ▇▃▇▃▇

0.2 Data visualization:

# Create dataframe with data
country_data <- data.frame(
  country=whr2015_2022$country,
  value=whr2015_2022$happiness_score)

# Define the colors for the low, mid, and high values
low_color <- "#FF9999"  # Softer red
mid_color <- "#FFFF99"  # Softer yellow
high_color <- "#99CC99" # Softer green

# Create the color palette function
cols <- colorRampPalette(c(low_color, mid_color, high_color))

# Use cols function to generate the number of colors we need
palette_colors <- cols(length(whr2015_2022))

# Countries to Map function
capture.output(n <- invisible(joinCountryData2Map(country_data, 
                                   joinCode="NAME", 
                                   nameJoinColumn="country")), file='NUL')

# Output plot in pdf
pdf("world_happiness_map.pdf", width = 10, height = 7)
mapCountryData(n, 
               nameColumnToPlot="value", 
               mapTitle="World Map for Happiness Score 2015-2022",
               colourPalette=palette_colors, 
               oceanCol = "#F0F8FF", 
               missingCountryCol = "#CCCCCCCC",
               addLegend = TRUE, 
               aspect = 1.1, 
               borderCol = "Black", 
               lwd =.1)

legend("bottom",  # Adjust position as needed
       legend=c("Low", "Medium", "High"),  # Example categories
       fill=c(low_color, mid_color, high_color),  # Corresponding colors
       title="Happiness Score",  # Title of the legend
       cex=0.8)  # Adjust text size as needed
capture.output(dev.off(), file='NUL')

# Output plot in R console
mapCountryData(n, 
               nameColumnToPlot="value", 
               mapTitle="World Map for Happiness Score 2015-2022",
               colourPalette=palette_colors, 
               oceanCol = "#F0F8FF", 
               missingCountryCol = "#CCCCCCCC",
               addLegend = TRUE, 
               aspect = 1.1, 
               borderCol = "Black", 
               lwd =.1)

legend("bottom",  # Adjust position as needed
       legend=c("Low", "Medium", "High"),  # Example categories
       fill=c(low_color, mid_color, high_color),  # Corresponding colors
       title="Happiness Score",  # Title of the legend
       cex=0.8)  # Adjust text size as needed

# Plot 1
ggplot(whr2015_2022, aes(x=economy_gdp_per_capita, y=happiness_score )) + 
  geom_point() + 
  geom_smooth(lm = loess) + 
  labs(title = "GDP per capita vs. Happiness Score",
       x = "GDP per capita",
       y = "Happiness Score")

# Plot 2
ggplot(whr2015_2022, aes(x=health_life_expectancy, y = happiness_score)) + 
  geom_point() + 
  geom_smooth(lm = loess) + 
  labs(title = "Health (Life expectancy) vs. Happiness Score",
       x = "Health (Life expectancy)",
       y = "Happiness Score")

# Plot 3
ggplot(whr2015_2022, aes(x = happiness_score, y = freedom)) + 
  geom_boxplot() + 
  labs(title = "Happiness Score vs. Freedom",
       x = "Happiness Score",
       y = "Freedom")

whr2022 <- whr2015_2022 %>%
  filter(year == 2022)

# Select top 10 and bottom 10 countries based on happiness score
top10_bottom10_countries <- whr2022 %>%
  arrange(desc(happiness_score)) %>%
  slice(c(1:10, (n()-9):n()))

# Plotting
ggplot(top10_bottom10_countries) +
  geom_point(aes(x = economy_gdp_per_capita, 
                 y = happiness_score, 
                 size = happiness_score, 
                 colour = factor(region),
                 alpha = 0.85)) +
  scale_size_continuous(range = c(2, 15)) +
  geom_vline(xintercept = 1.4, colour = "#f7347a", linetype = "longdash") + 
  geom_hline(yintercept = 5, colour = "#f7347a", linetype = "longdash") +
  geom_text(aes(x = economy_gdp_per_capita, y = happiness_score, label = country), 
            hjust = "left", 
            vjust = "bottom", 
            check_overlap = TRUE, 
            size = 3) +
  theme(legend.position = "none") +
  labs(title = "Happiness vs. GDP per capita for Top 5 and Bottom 5 countries in 2022",
       x = "GDP per capita",
       y = "Happiness score") +
  annotate("text", x = 0.83, y = 5.2, family = "Helvetica", size = 2.7, color = "gray20",
           label = "Lower GDP per capita") +
  annotate("text", x = 1.95, y = 5.2, family = "Helvetica", size = 2.7, color = "gray20",
           label = "Higher GDP per capita") +
  annotate("text", x = 1.53, y = 2.3, family = "Helvetica", size = 2.7, color = "gray20",
           label = "Lower Happiness") +
  annotate("text", x = 1.53, y = 8, family = "Helvetica", size = 2.7, color = "gray20",
           label = "Higher Happiness")

# Getting top 10 countries
whr2015_2022_top10 <- whr2022 %>%
  slice_max(happiness_score, n = 10) %>%
  mutate(cat = 'top_10', 
         country_rank = rank(-happiness_score),
         country_label = paste0(country, ' (', country_rank, ')'))

# Getting bottom 10 countries
whr2015_2022_bottom10 <- whr2022 %>%
  mutate(country_rank = rank(happiness_score),
         country_label = paste0(country, ' (', country_rank, ')')) %>%
  slice_min(happiness_score, n = 10) %>%
  mutate(cat = 'bottom_10')

# Plotting top 10 happiest countries 
top_10 <- ggplot(whr2015_2022_top10, aes(x = reorder(country_label, happiness_score))) + 
  geom_chicklet(aes(y = 10, fill = 4.9), width = 0.5, radius = grid::unit(5, "pt")) +
  geom_chicklet(aes(y = happiness_score, fill = happiness_score), width = 0.5, radius = grid::unit(5, "pt")) +
  geom_text(aes(y = happiness_score), label = round(whr2015_2022_top10$happiness_score, 2), nudge_y = 0.4, size = 3) + 
  scale_y_continuous(expand = c(0, 0.1), position = "right", limits = c(0, 10)) +
  scale_fill_gradient2(low = 'black', high = '#818aeb', mid = 'white', midpoint = 5) + 
  coord_flip() +
  labs(y="Best possible life = 10", x = '',
       title="Top 10 Happiest Countries in 2022",
       subtitle="8 of the happiest countries present in Europe",
       caption="Source: The World Happiness Report 2022") + 
  theme_ipsum(grid = '')  +
  theme(plot.title = element_text(size=15),
        plot.subtitle = element_text(size = 12),
        plot.caption = element_text(size = 10),
        axis.title.x = element_text(size= 10, color = '#555955'),
        axis.text.y = element_text(size = 10, color = 'black'),
        axis.text.x = element_blank(),
        legend.position = 'None')

# Plotting 10 saddest countries
bottom_10 <- ggplot(whr2015_2022_bottom10, aes(x = reorder(country_label, -happiness_score))) +
  geom_chicklet(aes(y = 10, fill = 4.9), width = 0.5, radius = grid::unit(5, "pt")) +
  geom_chicklet(aes(y = happiness_score, fill = happiness_score), width = 0.5, radius = grid::unit(5, "pt")) +
  geom_text(aes(y = happiness_score), label = round(whr2015_2022_bottom10$happiness_score, 2), nudge_y = 0.4, size = 3) + 
  scale_y_continuous(expand = c(0, 0.1), position = "right", limits = c(0, 10)) +
  scale_fill_gradient2(low = '#074040', high = '#4cc2c2', mid = 'white', midpoint = 5) + 
  coord_flip() +
  labs(y="Best possible life = 10", x = '',
       title="Top 10 Saddest Countries in 2022",
       subtitle="Ordered from saddest to less sad",
       caption="Source: The World Happiness Report 2022") + 
  theme_ipsum(grid = '') +
  theme(plot.title = element_text(size=15),
        plot.subtitle = element_text(size = 12),
        plot.caption = element_text(size = 10),
        axis.title.x = element_text(size= 10, color = '#555955'),
        axis.text.y = element_text(size = 10, color = 'black'),
        axis.text.x = element_blank(),
        legend.position = 'None')

# Displaying plots side by side
top_10 + bottom_10

# Creating a new variable for sorted regions
whr2022_sorted <- whr2022 %>%
  group_by(region) %>%
  mutate(avg_happiness = mean(happiness_score)) %>%
  ungroup() %>%
  mutate(region_sorted = reorder(region, avg_happiness))

# Plotting with regions sorted by happiness score
region_level <- ggplot(whr2022_sorted, aes(x = region_sorted, y = happiness_score, fill = region_sorted, text = country)) +
  geom_beeswarm(aes(color = region_sorted, alpha = 1)) +
  labs(title = "Country-wise happiness trends in world regions",
       x = "Region",
       y = "Happiness score") +
  geom_hline(yintercept = 5, colour = "#f7347a", linetype = "longdash") +
  theme_classic() +
  theme(legend.position = "none", 
        axis.text.x = element_text(angle = 0, hjust = 1, size = 8)) +
  scale_x_discrete(labels = wrap_format(10)) +  
  scale_fill_brewer(palette = "Spectral") +
  scale_color_brewer(palette = "Spectral") +
  geom_boxplot(aes(alpha = 2))

# Convert to ggplotly with tooltips
ggplotly(region_level, tooltip = c("country", "happiness_score"))